# Prepare complete analysis data set
# Notes: combine all 2020 data with older data in all available states.
#        executing this script builds the final analysis dataset. no
#        other files need to be independently executed.
# By Dom Valentino and Chris Kenny
# libs --------------------------------------------------------------------
library(plyr) # rbind.fill()
library(dplyr) # select()
library(haven) # read_dta()

# set wd to source file location
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

# table of contents -------------------------------------------------------
# source: execute all other scripts needed before this one
# data: read data
# combine: rbind all data together

# source (for replication purposes) ---------------------------------------
# source("prep_pre-pandemic_returns.R")
# source("prep_mt_returns.R")
# source("prep_nv_returns.R")
# source("prep_ut_returns.R")
# source("prep_vt_returns.R")
# source("prep_co_returns.R")
# source("prep_nj_returns.R")
# source("prep_wa_returns.R")
# source("prep_hi_returns.R")
# source("prep_ca_returns.R")

# data --------------------------------------------------------------------
final_pre <- read.csv("../data/analysis/analysis_pre-pandemic.csv")
final_nv <- read.csv("../data/analysis/analysis_nv.csv")
final_mt <- read.csv("../data/analysis/analysis_mt.csv")
final_ut <- read.csv("../data/analysis/analysis_ut.csv")
final_vt <- read.csv("../data/analysis/analysis_vt.csv")
final_co <- read.csv("../data/analysis/analysis_co.csv")
final_nj <- read.csv("../data/analysis/analysis_nj.csv")
final_wa <- read.csv("../data/analysis/analysis_wa.csv")
final_hi <- read.csv("../data/analysis/analysis_hi.csv")
final_ca <- read.csv("../data/analysis/analysis_ca.csv")
thompson <- read_dta("../data/modified data/analysis.dta") %>% 
  select(state, county, county_id, year, ballots_cast, cvap, cvap_moe, cvap_approx, 
         treat, dem_share_gov, dem_share_pres, dem_share_sen, turnout_share)

rm(list=setdiff(ls(), c("final_pre", "final_nv", "final_mt", "final_ut", "final_vt", "final_co", "final_nj", "final_wa", 
                        "final_hi", "final_ca", "thompson"))) # remove all objects except those needed

# combine -----------------------------------------------------------------
final <- rbind.fill(final_pre, final_nv) %>% 
  rbind.fill(., final_mt) %>% 
  rbind.fill(., final_ut) %>% 
  rbind.fill(., final_vt) %>% 
  rbind.fill(., final_co) %>% 
  rbind.fill(., final_nj) %>% 
  rbind.fill(., final_wa) %>% 
  rbind.fill(., final_hi) %>% 
  rbind.fill(., final_ca) %>% 
  rbind.fill(., thompson) %>% 
  mutate(year2 = year^2) %>% 
  group_by(state, county)
grpid <- group_indices(final)
final <- final %>% ungroup() %>% 
  mutate(county_id = grpid) %>% 
  group_by(state, year) 
grpid <- group_indices(final)
final <- final %>% ungroup() %>%
  mutate(state_year_id = grpid)

final <- mutate(final, division = case_when(
  
))

write_csv(final, "../data/analysis/analysis.csv")
write_dta(final, "../data/analysis/analysis.dta")
